{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T04:42:25.707007Z",
     "start_time": "2021-09-27T04:42:25.444943Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T04:42:26.369344Z",
     "start_time": "2021-09-27T04:42:26.138115Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Checking whether there is an H2O instance running at http://localhost:54321 . connected.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O_cluster_uptime:</td>\n",
       "<td>19 days 21 hours 28 mins</td></tr>\n",
       "<tr><td>H2O_cluster_timezone:</td>\n",
       "<td>Asia/Shanghai</td></tr>\n",
       "<tr><td>H2O_data_parsing_timezone:</td>\n",
       "<td>UTC</td></tr>\n",
       "<tr><td>H2O_cluster_version:</td>\n",
       "<td>3.32.1.5</td></tr>\n",
       "<tr><td>H2O_cluster_version_age:</td>\n",
       "<td>1 month and 22 days </td></tr>\n",
       "<tr><td>H2O_cluster_name:</td>\n",
       "<td>H2O_from_python_caihengxing_5p2i4b</td></tr>\n",
       "<tr><td>H2O_cluster_total_nodes:</td>\n",
       "<td>1</td></tr>\n",
       "<tr><td>H2O_cluster_free_memory:</td>\n",
       "<td>22.70 Gb</td></tr>\n",
       "<tr><td>H2O_cluster_total_cores:</td>\n",
       "<td>40</td></tr>\n",
       "<tr><td>H2O_cluster_allowed_cores:</td>\n",
       "<td>40</td></tr>\n",
       "<tr><td>H2O_cluster_status:</td>\n",
       "<td>locked, healthy</td></tr>\n",
       "<tr><td>H2O_connection_url:</td>\n",
       "<td>http://localhost:54321</td></tr>\n",
       "<tr><td>H2O_connection_proxy:</td>\n",
       "<td>{\"http\": null, \"https\": null}</td></tr>\n",
       "<tr><td>H2O_internal_security:</td>\n",
       "<td>False</td></tr>\n",
       "<tr><td>H2O_API_Extensions:</td>\n",
       "<td>Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4</td></tr>\n",
       "<tr><td>Python_version:</td>\n",
       "<td>3.7.3 final</td></tr></table></div>"
      ],
      "text/plain": [
       "--------------------------  ------------------------------------------------------------------\n",
       "H2O_cluster_uptime:         19 days 21 hours 28 mins\n",
       "H2O_cluster_timezone:       Asia/Shanghai\n",
       "H2O_data_parsing_timezone:  UTC\n",
       "H2O_cluster_version:        3.32.1.5\n",
       "H2O_cluster_version_age:    1 month and 22 days\n",
       "H2O_cluster_name:           H2O_from_python_caihengxing_5p2i4b\n",
       "H2O_cluster_total_nodes:    1\n",
       "H2O_cluster_free_memory:    22.70 Gb\n",
       "H2O_cluster_total_cores:    40\n",
       "H2O_cluster_allowed_cores:  40\n",
       "H2O_cluster_status:         locked, healthy\n",
       "H2O_connection_url:         http://localhost:54321\n",
       "H2O_connection_proxy:       {\"http\": null, \"https\": null}\n",
       "H2O_internal_security:      False\n",
       "H2O_API_Extensions:         Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4\n",
       "Python_version:             3.7.3 final\n",
       "--------------------------  ------------------------------------------------------------------"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import h2o\n",
    "from h2o.automl import H2OAutoML\n",
    "h2o.init()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T04:42:30.869719Z",
     "start_time": "2021-09-27T04:42:30.865534Z"
    }
   },
   "outputs": [],
   "source": [
    "train_path = '../autox/data/ventilator/train.csv'\n",
    "test_path = '../autox/data/ventilator/test.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T04:42:33.844789Z",
     "start_time": "2021-09-27T04:42:32.024512Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parse progress: |█████████████████████████████████████████████████████████| 100%\n",
      "Parse progress: |█████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "# Load data into H2O\n",
    "train = h2o.import_file(train_path)\n",
    "test  = h2o.import_file(test_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T04:42:33.912469Z",
     "start_time": "2021-09-27T04:42:33.871466Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead>\n",
       "<tr><th style=\"text-align: right;\">  id</th><th style=\"text-align: right;\">  breath_id</th><th style=\"text-align: right;\">  R</th><th style=\"text-align: right;\">  C</th><th style=\"text-align: right;\">  time_step</th><th style=\"text-align: right;\">     u_in</th><th style=\"text-align: right;\">  u_out</th><th style=\"text-align: right;\">  pressure</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td style=\"text-align: right;\">   1</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\"> 20</td><td style=\"text-align: right;\"> 50</td><td style=\"text-align: right;\">  0        </td><td style=\"text-align: right;\"> 0.083334</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">   5.83749</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   2</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\"> 20</td><td style=\"text-align: right;\"> 50</td><td style=\"text-align: right;\">  0.0336523</td><td style=\"text-align: right;\">18.383   </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">   5.90779</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   3</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\"> 20</td><td style=\"text-align: right;\"> 50</td><td style=\"text-align: right;\">  0.0675144</td><td style=\"text-align: right;\">22.5093  </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">   7.87625</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   4</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\"> 20</td><td style=\"text-align: right;\"> 50</td><td style=\"text-align: right;\">  0.101542 </td><td style=\"text-align: right;\">22.8088  </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">  11.7429 </td></tr>\n",
       "<tr><td style=\"text-align: right;\">   5</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\"> 20</td><td style=\"text-align: right;\"> 50</td><td style=\"text-align: right;\">  0.135756 </td><td style=\"text-align: right;\">25.3559  </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">  12.235  </td></tr>\n",
       "<tr><td style=\"text-align: right;\">   6</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\"> 20</td><td style=\"text-align: right;\"> 50</td><td style=\"text-align: right;\">  0.169698 </td><td style=\"text-align: right;\">27.2599  </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">  12.8677 </td></tr>\n",
       "<tr><td style=\"text-align: right;\">   7</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\"> 20</td><td style=\"text-align: right;\"> 50</td><td style=\"text-align: right;\">  0.203708 </td><td style=\"text-align: right;\">27.1275  </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">  14.6956 </td></tr>\n",
       "<tr><td style=\"text-align: right;\">   8</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\"> 20</td><td style=\"text-align: right;\"> 50</td><td style=\"text-align: right;\">  0.237723 </td><td style=\"text-align: right;\">26.8077  </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">  15.8907 </td></tr>\n",
       "<tr><td style=\"text-align: right;\">   9</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\"> 20</td><td style=\"text-align: right;\"> 50</td><td style=\"text-align: right;\">  0.271776 </td><td style=\"text-align: right;\">27.8647  </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">  15.5392 </td></tr>\n",
       "<tr><td style=\"text-align: right;\">  10</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\"> 20</td><td style=\"text-align: right;\"> 50</td><td style=\"text-align: right;\">  0.305732 </td><td style=\"text-align: right;\">28.313   </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">  15.7501 </td></tr>\n",
       "</tbody>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T04:42:52.037544Z",
     "start_time": "2021-09-27T04:42:52.033740Z"
    }
   },
   "outputs": [],
   "source": [
    "y = \"pressure\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T06:33:01.212414Z",
     "start_time": "2021-09-27T04:42:59.908492Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AutoML progress: |████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "aml2 = H2OAutoML(max_runtime_secs = 7200, seed = 1, project_name = \"kaggle_ventilator\")\n",
    "aml2.train(y = y, training_frame = train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T06:33:19.632210Z",
     "start_time": "2021-09-27T06:33:01.238538Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "stackedensemble prediction progress: |████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "pred = aml2.predict(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T06:33:22.749277Z",
     "start_time": "2021-09-27T06:33:19.670279Z"
    }
   },
   "outputs": [],
   "source": [
    "sub = pd.read_csv(test_path)\n",
    "sub = sub[['id']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T06:33:26.551904Z",
     "start_time": "2021-09-27T06:33:22.785528Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "pred = h2o.as_list(pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T06:33:27.454824Z",
     "start_time": "2021-09-27T06:33:26.592921Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "sub[y] = pred['predict']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-27T06:33:41.225838Z",
     "start_time": "2021-09-27T06:33:27.497241Z"
    }
   },
   "outputs": [],
   "source": [
    "sub.to_csv(\"./h2o_sub_kaggle_ventilator.csv\", index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
